[Autogluon] 불균형 데이터

Author

김보람

Published

February 1, 2024

imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import networkx as nx
import sklearn
import xgboost as xgb
import pickle 

# sklearn
from sklearn import model_selection # split함수이용
from sklearn import ensemble # RF,GBM
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

# gnn
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn import GCNConv

# autogluon
from autogluon.tabular import TabularDataset, TabularPredictor
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:18: UserWarning: An issue occurred while importing 'pyg-lib'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/libpyg.so: undefined symbol: _ZN2at4_ops12split_Tensor4callERKNS_6TensorEN3c106SymIntEl
  warnings.warn(f"An issue occurred while importing 'pyg-lib'. "
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:31: UserWarning: An issue occurred while importing 'torch-scatter'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_scatter/_scatter_cuda.so: undefined symbol: _ZNK3c107SymBool10guard_boolEPKcl
  warnings.warn(f"An issue occurred while importing 'torch-scatter'. "
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:42: UserWarning: An issue occurred while importing 'torch-sparse'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_sparse/_diag_cuda.so: undefined symbol: _ZN3c106detail19maybe_wrap_dim_slowIlEET_S2_S2_b
  warnings.warn(f"An issue occurred while importing 'torch-sparse'. "
def throw(df, fraud_rate):  # 사기 거래 비율에 맞춰 버려지는 함수!
    df1 = df[df['is_fraud'] == 1].copy()
    df0 = df[df['is_fraud'] == 0].copy()
    df0_downsample = (len(df1) * (1-fraud_rate)) / (len(df0) * fraud_rate)
    df0_down = df0.sample(frac=df0_downsample, random_state=42)
    df_p = pd.concat([df1, df0_down])
    return df_p

def split_dataframe(data_frame, test_fraud_rate, test_rate=0.3):
    n = len(data_frame)

    # 사기 거래와 정상 거래를 분리
    fraud_data = data_frame[data_frame['is_fraud'] == 1]
    normal_data = data_frame[data_frame['is_fraud'] == 0]

    # 테스트 데이터 크기 계산
    test_samples = int(test_fraud_rate * (n * test_rate))
    remaining_test_samples = int(n * test_rate) - test_samples

    # 사기 거래 및 정상 거래에서 무작위로 테스트 데이터 추출
    test_fraud_data = fraud_data.sample(n=test_samples, replace=False)
    test_normal_data = normal_data.sample(n=remaining_test_samples, replace=False)

    # 테스트 데이터 합치기
    test_data = pd.concat([test_normal_data, test_fraud_data])

    # 훈련 데이터 생성
    train_data = data_frame[~data_frame.index.isin(test_data.index)]

    return train_data, test_data

def concat(df_tr, df_tst):   
    df = pd.concat([df_tr, df_tst])
    train_mask = np.concatenate((np.full(len(df_tr), True), np.full(len(df_tst), False)))    # index꼬이는거 방지하기 위해서? ★ (이거,, 훔,,?(
    test_mask =  np.concatenate((np.full(len(df_tr), False), np.full(len(df_tst), True))) 
    mask = (train_mask, test_mask)
    return df, mask

def evaluation(y, yhat):
    metrics = [sklearn.metrics.accuracy_score,
               sklearn.metrics.precision_score,
               sklearn.metrics.recall_score,
               sklearn.metrics.f1_score,
               sklearn.metrics.roc_auc_score]
    return pd.DataFrame({m.__name__:[m(y,yhat).round(6)] for m in metrics})

def compute_time_difference(group):
    n = len(group)
    result = []
    for i in range(n):
        for j in range(n):
            time_difference = abs((group.iloc[i].trans_date_trans_time - group.iloc[j].trans_date_trans_time).total_seconds())
            result.append([group.iloc[i].name, group.iloc[j].name, time_difference])
    return result

def edge_index_save(df, unique_col, theta, gamma):
    groups = df.groupby(unique_col)
    edge_index = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
    edge_index = edge_index.astype(np.float64)
    filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"

    while os.path.exists(filename):
        self.save_attempt += 1
        filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
    np.save(filename, edge_index)
    #tetha = edge_index_plust_itme[:,].mean()


    edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
    edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
    return edge_index

def edge_index(df, unique_col, theta, gamma):
    groups = df.groupby(unique_col)
    edge_index = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
    edge_index = edge_index.astype(np.float64)
   # filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"

    # while os.path.exists(filename):
    #     self.save_attempt += 1
    #     filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
    # np.save(filename, edge_index)
    #tetha = edge_index_plust_itme[:,].mean()


    edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
    edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
    return edge_index
with open('../fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    

Autogluon(df02)

fraudTrain = fraudTrain[["amt","is_fraud"]]
# def auto(df,test_fraud_rate):
#     df_tr, df_tst = split_dataframe(df, test_fraud_rate)
#     tr = TabularDataset(df_tr)
#     tst = TabularDataset(df_tst)
#     predictr = TabularPredictor("is_fraud")
#     predictr.fit(tr, presets='best_quality')
#     y = tst.is_fraud
#     yhat = predictr.predict(tst)
#     result = evaluation(y,yhat)
#     return result
df = throw(fraudTrain, 0.3)
df_tr, df_tst = split_dataframe(df, 0.5)
tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)
predictr = TabularPredictor("is_fraud")
predictr.fit(tr, presets='best_quality')
y = tst.is_fraud
yhat = predictr.predict(tst)
result1 = evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240202_071009/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240202_071009/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   590.08 GB / 982.82 GB (60.0%)
Train Data Rows:    14014
Train Data Columns: 1
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    48787.66 MB
    Train Data (Original)  Memory Usage: 0.11 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Types of features in original data (raw dtype, special dtypes):
        ('float', []) : 1 | ['amt']
    Types of features in processed data (raw dtype, special dtypes):
        ('float', []) : 1 | ['amt']
    0.0s = Fit runtime
    1 features in original data used to generate 1 features in processed data.
    Train Data (Processed) Memory Usage: 0.11 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.03s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
    0.917    = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.02s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
    0.9084   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.02s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9234   = Validation score   (accuracy)
    0.38s    = Training   runtime
    0.03s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9261   = Validation score   (accuracy)
    0.76s    = Training   runtime
    0.03s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9022   = Validation score   (accuracy)
    0.44s    = Training   runtime
    0.26s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9022   = Validation score   (accuracy)
    0.51s    = Training   runtime
    0.24s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9247   = Validation score   (accuracy)
    1.77s    = Training   runtime
    0.01s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9068   = Validation score   (accuracy)
    0.34s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.907    = Validation score   (accuracy)
    0.36s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9259   = Validation score   (accuracy)
    12.19s   = Training   runtime
    0.14s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9246   = Validation score   (accuracy)
    0.5s     = Training   runtime
    0.02s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9259   = Validation score   (accuracy)
    13.51s   = Training   runtime
    0.07s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9261   = Validation score   (accuracy)
    1.0s     = Training   runtime
    0.02s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9261   = Validation score   (accuracy)
    2.94s    = Training   runtime
    0.02s    = Validation runtime
AutoGluon training complete, total runtime = 45.78s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240202_071009/")
predictr.evaluate(tst)
Evaluation: accuracy on test data: 0.8519813519813519
Evaluations on test data:
{
    "accuracy": 0.8519813519813519,
    "balanced_accuracy": 0.8519813519813519,
    "mcc": 0.7293218533016024,
    "roc_auc": 0.9617966670913722,
    "f1": 0.8297261061099407,
    "precision": 0.9765554553651938,
    "recall": 0.7212787212787213
}
{'accuracy': 0.8519813519813519,
 'balanced_accuracy': 0.8519813519813519,
 'mcc': 0.7293218533016024,
 'roc_auc': 0.9617966670913722,
 'f1': 0.8297261061099407,
 'precision': 0.9765554553651938,
 'recall': 0.7212787212787213}
predictr._trainer.model_graph.nodes
NodeView(('KNeighborsUnif_BAG_L1', 'KNeighborsDist_BAG_L1', 'LightGBMXT_BAG_L1', 'LightGBM_BAG_L1', 'RandomForestGini_BAG_L1', 'RandomForestEntr_BAG_L1', 'CatBoost_BAG_L1', 'ExtraTreesGini_BAG_L1', 'ExtraTreesEntr_BAG_L1', 'NeuralNetFastAI_BAG_L1', 'XGBoost_BAG_L1', 'NeuralNetTorch_BAG_L1', 'LightGBMLarge_BAG_L1', 'WeightedEnsemble_L2'))
predictr.fit_summary()
*** Summary of fit() ***
Estimated performance of each model:
                      model  score_val  pred_time_val   fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      LightGBMLarge_BAG_L1   0.926145       0.023825   1.000465                0.023825           1.000465            1       True         13
1           LightGBM_BAG_L1   0.926145       0.030142   0.756562                0.030142           0.756562            1       True          4
2       WeightedEnsemble_L2   0.926145       0.045217   3.698785                0.015075           2.942223            2       True         14
3     NeuralNetTorch_BAG_L1   0.925931       0.068089  13.514495                0.068089          13.514495            1       True         12
4    NeuralNetFastAI_BAG_L1   0.925860       0.144026  12.193488                0.144026          12.193488            1       True         10
5           CatBoost_BAG_L1   0.924718       0.005455   1.765058                0.005455           1.765058            1       True          7
6            XGBoost_BAG_L1   0.924575       0.019437   0.499822                0.019437           0.499822            1       True         11
7         LightGBMXT_BAG_L1   0.923362       0.034598   0.377455                0.034598           0.377455            1       True          3
8     KNeighborsUnif_BAG_L1   0.917012       0.015843   0.003800                0.015843           0.003800            1       True          1
9     KNeighborsDist_BAG_L1   0.908377       0.015021   0.003333                0.015021           0.003333            1       True          2
10    ExtraTreesEntr_BAG_L1   0.907022       0.277414   0.361435                0.277414           0.361435            1       True          9
11    ExtraTreesGini_BAG_L1   0.906807       0.276092   0.336341                0.276092           0.336341            1       True          8
12  RandomForestEntr_BAG_L1   0.902169       0.239527   0.511043                0.239527           0.511043            1       True          6
13  RandomForestGini_BAG_L1   0.902169       0.255825   0.442638                0.255825           0.442638            1       True          5
Number of models trained: 14
Types of models trained:
{'StackerEnsembleModel_CatBoost', 'StackerEnsembleModel_LGB', 'StackerEnsembleModel_TabularNeuralNetTorch', 'StackerEnsembleModel_RF', 'StackerEnsembleModel_NNFastAiTabular', 'StackerEnsembleModel_XGBoost', 'StackerEnsembleModel_XT', 'WeightedEnsembleModel', 'StackerEnsembleModel_KNN'}
Bagging used: True  (with 8 folds)
Multi-layer stack-ensembling used: False 
Feature Metadata (Processed):
(raw dtype, special dtypes):
('float', []) : 1 | ['amt']
*** End of fit() summary ***
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/autogluon/core/utils/plots.py:169: UserWarning: AutoGluon summary plots cannot be created because bokeh is not installed. To see plots, please do: "pip install bokeh==2.0.1"
  warnings.warn('AutoGluon summary plots cannot be created because bokeh is not installed. To see plots, please do: "pip install bokeh==2.0.1"')
{'model_types': {'KNeighborsUnif_BAG_L1': 'StackerEnsembleModel_KNN',
  'KNeighborsDist_BAG_L1': 'StackerEnsembleModel_KNN',
  'LightGBMXT_BAG_L1': 'StackerEnsembleModel_LGB',
  'LightGBM_BAG_L1': 'StackerEnsembleModel_LGB',
  'RandomForestGini_BAG_L1': 'StackerEnsembleModel_RF',
  'RandomForestEntr_BAG_L1': 'StackerEnsembleModel_RF',
  'CatBoost_BAG_L1': 'StackerEnsembleModel_CatBoost',
  'ExtraTreesGini_BAG_L1': 'StackerEnsembleModel_XT',
  'ExtraTreesEntr_BAG_L1': 'StackerEnsembleModel_XT',
  'NeuralNetFastAI_BAG_L1': 'StackerEnsembleModel_NNFastAiTabular',
  'XGBoost_BAG_L1': 'StackerEnsembleModel_XGBoost',
  'NeuralNetTorch_BAG_L1': 'StackerEnsembleModel_TabularNeuralNetTorch',
  'LightGBMLarge_BAG_L1': 'StackerEnsembleModel_LGB',
  'WeightedEnsemble_L2': 'WeightedEnsembleModel'},
 'model_performance': {'KNeighborsUnif_BAG_L1': 0.9170115598687028,
  'KNeighborsDist_BAG_L1': 0.9083773369487655,
  'LightGBMXT_BAG_L1': 0.9233623519337805,
  'LightGBM_BAG_L1': 0.9261452832881404,
  'RandomForestGini_BAG_L1': 0.9021692593121164,
  'RandomForestEntr_BAG_L1': 0.9021692593121164,
  'CatBoost_BAG_L1': 0.9247181390038532,
  'ExtraTreesGini_BAG_L1': 0.9068074782360497,
  'ExtraTreesEntr_BAG_L1': 0.9070215498786928,
  'NeuralNetFastAI_BAG_L1': 0.925859854431283,
  'XGBoost_BAG_L1': 0.9245754245754245,
  'NeuralNetTorch_BAG_L1': 0.9259312116454974,
  'LightGBMLarge_BAG_L1': 0.9261452832881404,
  'WeightedEnsemble_L2': 0.9261452832881404},
 'model_best': 'WeightedEnsemble_L2',
 'model_paths': {'KNeighborsUnif_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/KNeighborsUnif_BAG_L1/',
  'KNeighborsDist_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/KNeighborsDist_BAG_L1/',
  'LightGBMXT_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/LightGBMXT_BAG_L1/',
  'LightGBM_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/LightGBM_BAG_L1/',
  'RandomForestGini_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/RandomForestGini_BAG_L1/',
  'RandomForestEntr_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/RandomForestEntr_BAG_L1/',
  'CatBoost_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/CatBoost_BAG_L1/',
  'ExtraTreesGini_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/ExtraTreesGini_BAG_L1/',
  'ExtraTreesEntr_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/ExtraTreesEntr_BAG_L1/',
  'NeuralNetFastAI_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/NeuralNetFastAI_BAG_L1/',
  'XGBoost_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/XGBoost_BAG_L1/',
  'NeuralNetTorch_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/NeuralNetTorch_BAG_L1/',
  'LightGBMLarge_BAG_L1': 'AutogluonModels/ag-20240202_071009/models/LightGBMLarge_BAG_L1/',
  'WeightedEnsemble_L2': 'AutogluonModels/ag-20240202_071009/models/WeightedEnsemble_L2/'},
 'model_fit_times': {'KNeighborsUnif_BAG_L1': 0.0038001537322998047,
  'KNeighborsDist_BAG_L1': 0.0033333301544189453,
  'LightGBMXT_BAG_L1': 0.377455472946167,
  'LightGBM_BAG_L1': 0.7565624713897705,
  'RandomForestGini_BAG_L1': 0.4426383972167969,
  'RandomForestEntr_BAG_L1': 0.511042594909668,
  'CatBoost_BAG_L1': 1.7650575637817383,
  'ExtraTreesGini_BAG_L1': 0.33634114265441895,
  'ExtraTreesEntr_BAG_L1': 0.3614346981048584,
  'NeuralNetFastAI_BAG_L1': 12.193488121032715,
  'XGBoost_BAG_L1': 0.49982166290283203,
  'NeuralNetTorch_BAG_L1': 13.514495372772217,
  'LightGBMLarge_BAG_L1': 1.000464916229248,
  'WeightedEnsemble_L2': 2.9422225952148438},
 'model_pred_times': {'KNeighborsUnif_BAG_L1': 0.01584339141845703,
  'KNeighborsDist_BAG_L1': 0.015021324157714844,
  'LightGBMXT_BAG_L1': 0.03459787368774414,
  'LightGBM_BAG_L1': 0.03014206886291504,
  'RandomForestGini_BAG_L1': 0.2558248043060303,
  'RandomForestEntr_BAG_L1': 0.23952651023864746,
  'CatBoost_BAG_L1': 0.00545501708984375,
  'ExtraTreesGini_BAG_L1': 0.2760922908782959,
  'ExtraTreesEntr_BAG_L1': 0.27741408348083496,
  'NeuralNetFastAI_BAG_L1': 0.1440262794494629,
  'XGBoost_BAG_L1': 0.019437313079833984,
  'NeuralNetTorch_BAG_L1': 0.06808900833129883,
  'LightGBMLarge_BAG_L1': 0.02382516860961914,
  'WeightedEnsemble_L2': 0.015074968338012695},
 'num_bag_folds': 8,
 'max_stack_level': 2,
 'num_classes': 2,
 'model_hyperparams': {'KNeighborsUnif_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True,
   'use_child_oof': True},
  'KNeighborsDist_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True,
   'use_child_oof': True},
  'LightGBMXT_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True},
  'LightGBM_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True},
  'RandomForestGini_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True,
   'use_child_oof': True},
  'RandomForestEntr_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True,
   'use_child_oof': True},
  'CatBoost_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True},
  'ExtraTreesGini_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True,
   'use_child_oof': True},
  'ExtraTreesEntr_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True,
   'use_child_oof': True},
  'NeuralNetFastAI_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True},
  'XGBoost_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True},
  'NeuralNetTorch_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True},
  'LightGBMLarge_BAG_L1': {'use_orig_features': True,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True},
  'WeightedEnsemble_L2': {'use_orig_features': False,
   'max_base_models': 25,
   'max_base_models_per_type': 5,
   'save_bag_folds': True}},
 'leaderboard':                       model  score_val  pred_time_val   fit_time  \
 0      LightGBMLarge_BAG_L1   0.926145       0.023825   1.000465   
 1           LightGBM_BAG_L1   0.926145       0.030142   0.756562   
 2       WeightedEnsemble_L2   0.926145       0.045217   3.698785   
 3     NeuralNetTorch_BAG_L1   0.925931       0.068089  13.514495   
 4    NeuralNetFastAI_BAG_L1   0.925860       0.144026  12.193488   
 5           CatBoost_BAG_L1   0.924718       0.005455   1.765058   
 6            XGBoost_BAG_L1   0.924575       0.019437   0.499822   
 7         LightGBMXT_BAG_L1   0.923362       0.034598   0.377455   
 8     KNeighborsUnif_BAG_L1   0.917012       0.015843   0.003800   
 9     KNeighborsDist_BAG_L1   0.908377       0.015021   0.003333   
 10    ExtraTreesEntr_BAG_L1   0.907022       0.277414   0.361435   
 11    ExtraTreesGini_BAG_L1   0.906807       0.276092   0.336341   
 12  RandomForestEntr_BAG_L1   0.902169       0.239527   0.511043   
 13  RandomForestGini_BAG_L1   0.902169       0.255825   0.442638   
 
     pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  \
 0                 0.023825           1.000465            1       True   
 1                 0.030142           0.756562            1       True   
 2                 0.015075           2.942223            2       True   
 3                 0.068089          13.514495            1       True   
 4                 0.144026          12.193488            1       True   
 5                 0.005455           1.765058            1       True   
 6                 0.019437           0.499822            1       True   
 7                 0.034598           0.377455            1       True   
 8                 0.015843           0.003800            1       True   
 9                 0.015021           0.003333            1       True   
 10                0.277414           0.361435            1       True   
 11                0.276092           0.336341            1       True   
 12                0.239527           0.511043            1       True   
 13                0.255825           0.442638            1       True   
 
     fit_order  
 0          13  
 1           4  
 2          14  
 3          12  
 4          10  
 5           7  
 6          11  
 7           3  
 8           1  
 9           2  
 10          9  
 11          8  
 12          6  
 13          5  }
predictr.get_model_names()
['KNeighborsUnif_BAG_L1',
 'KNeighborsDist_BAG_L1',
 'LightGBMXT_BAG_L1',
 'LightGBM_BAG_L1',
 'RandomForestGini_BAG_L1',
 'RandomForestEntr_BAG_L1',
 'CatBoost_BAG_L1',
 'ExtraTreesGini_BAG_L1',
 'ExtraTreesEntr_BAG_L1',
 'NeuralNetFastAI_BAG_L1',
 'XGBoost_BAG_L1',
 'NeuralNetTorch_BAG_L1',
 'LightGBMLarge_BAG_L1',
 'WeightedEnsemble_L2']
models = predictr._trainer.model_graph.nodes

# 결과를 저장할 데이터프레임 생성
results = pd.DataFrame(columns=['Model', 'Evaluation'])

# 각 모델에 대해 평가 수행 및 결과 저장
for model_name in models:
    # 모델 평가
    eval_result = predictr.evaluate(tst, model=model_name)
    
    # 결과를 데이터프레임에 추가
    results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8591408591408591
Evaluations on test data:
{
    "accuracy": 0.8591408591408591,
    "balanced_accuracy": 0.8591408591408591,
    "mcc": 0.7333692346586707,
    "roc_auc": 0.9238014178074118,
    "f1": 0.8433333333333334,
    "precision": 0.9499374217772215,
    "recall": 0.7582417582417582
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8546453546453546
Evaluations on test data:
{
    "accuracy": 0.8546453546453546,
    "balanced_accuracy": 0.8546453546453547,
    "mcc": 0.7243419861043393,
    "roc_auc": 0.9035307017324999,
    "f1": 0.8382434685936633,
    "precision": 0.9448621553884712,
    "recall": 0.7532467532467533
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8576423576423576
Evaluations on test data:
{
    "accuracy": 0.8576423576423576,
    "balanced_accuracy": 0.8576423576423576,
    "mcc": 0.7357286217082192,
    "roc_auc": 0.9497830951377405,
    "f1": 0.8387705072600414,
    "precision": 0.9669565217391304,
    "recall": 0.7405927405927406
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8519813519813519
Evaluations on test data:
{
    "accuracy": 0.8519813519813519,
    "balanced_accuracy": 0.8519813519813519,
    "mcc": 0.7293218533016024,
    "roc_auc": 0.9617966670913722,
    "f1": 0.8297261061099407,
    "precision": 0.9765554553651938,
    "recall": 0.7212787212787213
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8463203463203464
Evaluations on test data:
{
    "accuracy": 0.8463203463203464,
    "balanced_accuracy": 0.8463203463203464,
    "mcc": 0.7069430711855968,
    "roc_auc": 0.9305546823029339,
    "f1": 0.829232192414431,
    "precision": 0.9329725228975854,
    "recall": 0.7462537462537463
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8463203463203464
Evaluations on test data:
{
    "accuracy": 0.8463203463203464,
    "balanced_accuracy": 0.8463203463203464,
    "mcc": 0.7069430711855968,
    "roc_auc": 0.9305546823029339,
    "f1": 0.829232192414431,
    "precision": 0.9329725228975854,
    "recall": 0.7462537462537463
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8613053613053613
Evaluations on test data:
{
    "accuracy": 0.8613053613053613,
    "balanced_accuracy": 0.8613053613053614,
    "mcc": 0.7411229848770046,
    "roc_auc": 0.961435722674484,
    "f1": 0.8439782730848474,
    "precision": 0.9644691780821918,
    "recall": 0.7502497502497503
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8553113553113553
Evaluations on test data:
{
    "accuracy": 0.8553113553113553,
    "balanced_accuracy": 0.8553113553113554,
    "mcc": 0.7239232471410014,
    "roc_auc": 0.938045415567893,
    "f1": 0.8400515369041046,
    "precision": 0.9390946502057613,
    "recall": 0.7599067599067599
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8546453546453546
Evaluations on test data:
{
    "accuracy": 0.8546453546453546,
    "balanced_accuracy": 0.8546453546453546,
    "mcc": 0.7228538607711172,
    "roc_auc": 0.9375860569666764,
    "f1": 0.8391376451077943,
    "precision": 0.9393564356435643,
    "recall": 0.7582417582417582
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8614718614718615
Evaluations on test data:
{
    "accuracy": 0.8614718614718615,
    "balanced_accuracy": 0.8614718614718615,
    "mcc": 0.740949374063438,
    "roc_auc": 0.9046451938559831,
    "f1": 0.8444278234854151,
    "precision": 0.9628997867803838,
    "recall": 0.7519147519147519
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8578088578088578
Evaluations on test data:
{
    "accuracy": 0.8578088578088578,
    "balanced_accuracy": 0.8578088578088578,
    "mcc": 0.7372429603136902,
    "roc_auc": 0.9617392264744913,
    "f1": 0.8383800151400455,
    "precision": 0.9710653222270934,
    "recall": 0.7375957375957376
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8619713619713619
Evaluations on test data:
{
    "accuracy": 0.8619713619713619,
    "balanced_accuracy": 0.861971361971362,
    "mcc": 0.7423737770295685,
    "roc_auc": 0.9527965097395666,
    "f1": 0.8447856206702864,
    "precision": 0.9649272882805817,
    "recall": 0.7512487512487512
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8514818514818515
Evaluations on test data:
{
    "accuracy": 0.8514818514818515,
    "balanced_accuracy": 0.8514818514818514,
    "mcc": 0.7291804253195859,
    "roc_auc": 0.9607630686551766,
    "f1": 0.8287250384024577,
    "precision": 0.9786848072562359,
    "recall": 0.7186147186147186
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
Evaluation: accuracy on test data: 0.8519813519813519
Evaluations on test data:
{
    "accuracy": 0.8519813519813519,
    "balanced_accuracy": 0.8519813519813519,
    "mcc": 0.7293218533016024,
    "roc_auc": 0.9617966670913722,
    "f1": 0.8297261061099407,
    "precision": 0.9765554553651938,
    "recall": 0.7212787212787213
}
/tmp/ipykernel_3624978/2085143255.py:12: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  results = results.append({'Model': model_name, 'Evaluation': eval_result}, ignore_index=True)
results
Model Evaluation
0 KNeighborsUnif_BAG_L1 {'accuracy': 0.8591408591408591, 'balanced_accuracy': 0.8591408591408591, 'mcc': 0.7333692346586707, 'roc_auc': 0.9238014178074118, 'f1': 0.8433333333333334, 'precision': 0.9499374217772215, 'recall': 0.7582417582417582}
1 KNeighborsDist_BAG_L1 {'accuracy': 0.8546453546453546, 'balanced_accuracy': 0.8546453546453547, 'mcc': 0.7243419861043393, 'roc_auc': 0.9035307017324999, 'f1': 0.8382434685936633, 'precision': 0.9448621553884712, 'recall': 0.7532467532467533}
2 LightGBMXT_BAG_L1 {'accuracy': 0.8576423576423576, 'balanced_accuracy': 0.8576423576423576, 'mcc': 0.7357286217082192, 'roc_auc': 0.9497830951377405, 'f1': 0.8387705072600414, 'precision': 0.9669565217391304, 'recall': 0.7405927405927406}
3 LightGBM_BAG_L1 {'accuracy': 0.8519813519813519, 'balanced_accuracy': 0.8519813519813519, 'mcc': 0.7293218533016024, 'roc_auc': 0.9617966670913722, 'f1': 0.8297261061099407, 'precision': 0.9765554553651938, 'recall': 0.7212787212787213}
4 RandomForestGini_BAG_L1 {'accuracy': 0.8463203463203464, 'balanced_accuracy': 0.8463203463203464, 'mcc': 0.7069430711855968, 'roc_auc': 0.9305546823029339, 'f1': 0.829232192414431, 'precision': 0.9329725228975854, 'recall': 0.7462537462537463}
5 RandomForestEntr_BAG_L1 {'accuracy': 0.8463203463203464, 'balanced_accuracy': 0.8463203463203464, 'mcc': 0.7069430711855968, 'roc_auc': 0.9305546823029339, 'f1': 0.829232192414431, 'precision': 0.9329725228975854, 'recall': 0.7462537462537463}
6 CatBoost_BAG_L1 {'accuracy': 0.8613053613053613, 'balanced_accuracy': 0.8613053613053614, 'mcc': 0.7411229848770046, 'roc_auc': 0.961435722674484, 'f1': 0.8439782730848474, 'precision': 0.9644691780821918, 'recall': 0.7502497502497503}
7 ExtraTreesGini_BAG_L1 {'accuracy': 0.8553113553113553, 'balanced_accuracy': 0.8553113553113554, 'mcc': 0.7239232471410014, 'roc_auc': 0.938045415567893, 'f1': 0.8400515369041046, 'precision': 0.9390946502057613, 'recall': 0.7599067599067599}
8 ExtraTreesEntr_BAG_L1 {'accuracy': 0.8546453546453546, 'balanced_accuracy': 0.8546453546453546, 'mcc': 0.7228538607711172, 'roc_auc': 0.9375860569666764, 'f1': 0.8391376451077943, 'precision': 0.9393564356435643, 'recall': 0.7582417582417582}
9 NeuralNetFastAI_BAG_L1 {'accuracy': 0.8614718614718615, 'balanced_accuracy': 0.8614718614718615, 'mcc': 0.740949374063438, 'roc_auc': 0.9046451938559831, 'f1': 0.8444278234854151, 'precision': 0.9628997867803838, 'recall': 0.7519147519147519}
10 XGBoost_BAG_L1 {'accuracy': 0.8578088578088578, 'balanced_accuracy': 0.8578088578088578, 'mcc': 0.7372429603136902, 'roc_auc': 0.9617392264744913, 'f1': 0.8383800151400455, 'precision': 0.9710653222270934, 'recall': 0.7375957375957376}
11 NeuralNetTorch_BAG_L1 {'accuracy': 0.8619713619713619, 'balanced_accuracy': 0.861971361971362, 'mcc': 0.7423737770295685, 'roc_auc': 0.9527965097395666, 'f1': 0.8447856206702864, 'precision': 0.9649272882805817, 'recall': 0.7512487512487512}
12 LightGBMLarge_BAG_L1 {'accuracy': 0.8514818514818515, 'balanced_accuracy': 0.8514818514818514, 'mcc': 0.7291804253195859, 'roc_auc': 0.9607630686551766, 'f1': 0.8287250384024577, 'precision': 0.9786848072562359, 'recall': 0.7186147186147186}
13 WeightedEnsemble_L2 {'accuracy': 0.8519813519813519, 'balanced_accuracy': 0.8519813519813519, 'mcc': 0.7293218533016024, 'roc_auc': 0.9617966670913722, 'f1': 0.8297261061099407, 'precision': 0.9765554553651938, 'recall': 0.7212787212787213}
results
[{'model': 'KNeighborsUnif_BAG_L1',
  'acc': 0.8541458541458542,
  'pre': 0.9411032766486935,
  'rec': 0.7555777555777555,
  'f1': 0.838197266346509,
  'auc': 0.9230023500752773},
 {'model': 'KNeighborsDist_BAG_L1',
  'acc': 0.8481518481518482,
  'pre': 0.933997509339975,
  'rec': 0.7492507492507493,
  'f1': 0.8314855875831486,
  'auc': 0.8990750064676138},
 {'model': 'LightGBMXT_BAG_L1',
  'acc': 0.8571428571428571,
  'pre': 0.9669133652590335,
  'rec': 0.7395937395937396,
  'f1': 0.8381132075471698,
  'auc': 0.9521417088849656},
 {'model': 'LightGBM_BAG_L1',
  'acc': 0.8486513486513486,
  'pre': 0.9746146872166818,
  'rec': 0.7159507159507159,
  'f1': 0.8254943367248992,
  'auc': 0.9625852003474381},
 {'model': 'RandomForestGini_BAG_L1',
  'acc': 0.8426573426573427,
  'pre': 0.925206611570248,
  'rec': 0.7455877455877455,
  'f1': 0.825742209109349,
  'auc': 0.9291861984169677},
 {'model': 'RandomForestEntr_BAG_L1',
  'acc': 0.8426573426573427,
  'pre': 0.925206611570248,
  'rec': 0.7455877455877455,
  'f1': 0.825742209109349,
  'auc': 0.9291861984169677},
 {'model': 'CatBoost_BAG_L1',
  'acc': 0.8594738594738595,
  'pre': 0.965101249461439,
  'rec': 0.745920745920746,
  'f1': 0.8414725770097672,
  'auc': 0.9625810420016214},
 {'model': 'ExtraTreesGini_BAG_L1',
  'acc': 0.8498168498168498,
  'pre': 0.9317714755445952,
  'rec': 0.7549117549117549,
  'f1': 0.8340691685062547,
  'auc': 0.9354622511465667},
 {'model': 'ExtraTreesEntr_BAG_L1',
  'acc': 0.8491508491508492,
  'pre': 0.9298892988929889,
  'rec': 0.7552447552447552,
  'f1': 0.8335170893054022,
  'auc': 0.9351754916190481},
 {'model': 'NeuralNetFastAI_BAG_L1',
  'acc': 0.8596403596403597,
  'pre': 0.9651162790697675,
  'rec': 0.7462537462537463,
  'f1': 0.8416901408450705,
  'auc': 0.9173069133109093},
 {'model': 'XGBoost_BAG_L1',
  'acc': 0.8533133533133533,
  'pre': 0.9715555555555555,
  'rec': 0.7279387279387279,
  'f1': 0.8322863125832858,
  'auc': 0.9626036634028641},
 {'model': 'NeuralNetTorch_BAG_L1',
  'acc': 0.8596403596403597,
  'pre': 0.9655172413793104,
  'rec': 0.745920745920746,
  'f1': 0.8416306594025925,
  'auc': 0.9554923930548306},
 {'model': 'LightGBMLarge_BAG_L1',
  'acc': 0.846986346986347,
  'pre': 0.9771062271062271,
  'rec': 0.7106227106227107,
  'f1': 0.8228262965105071,
  'auc': 0.9620277602295586},
 {'model': 'WeightedEnsemble_L2',
  'acc': 0.855977355977356,
  'pre': 0.9684487291849255,
  'rec': 0.7359307359307359,
  'f1': 0.836329233680227,
  'auc': 0.9624103834893047}]